* Reset settings and initialize log file
launch, path("share/sumstats")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Create a table of summary statistics for the CPS sample.
*-------------------------------------------------------------------------------


* Prepare table headers and footers
*-------------------------------------------------------------------------------

* Write table header
capture program drop write_header
program define write_header
	syntax, handle(string) labelwidth(string)

	file write `handle' "\begin{tabularx}{\textwidth}{p{2.5in}SSSS}" _n
	file write `handle' "\toprule" _n
	file write `handle' "& \multicolumn{2}{c}{\textbf{All prime-age}} & \multicolumn{2}{c}{\textbf{Parents (child 6--12)}} \\" _n
	file write `handle' "& {\centering \textbf{\text{Women}}} & {\centering \textbf{\text{Men}}} & {\centering \textbf{\text{Mothers}}} & {\centering \textbf{\text{Fathers}}} \\" _n
	file write `handle' "& {\centering \text{(1)}} & {\centering \text{(2)}} & {\centering \text{(3)}} & {\centering \text{(4)}} \\" _n
	file write `handle' "\midrule" _n
end

* Write table footer
capture program drop write_footer
program define write_footer
	syntax, handle(string)

	file write `handle' "\bottomrule" _n
	file write `handle' "\end{tabularx}"
	file close `handle'
end


* Prepare CPS summary statistics
*-------------------------------------------------------------------------------

* Load the sample
gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

* Construct indicators related to the youngest child in the household
gen byte haskid = inlist(youngest, 1, 2, 3)
gen byte kidunder6 = (youngest == 1)
gen byte kid6to12 = (youngest == 2)
gen byte kid13to17 = (youngest == 3)

* Create indicators for membership in four groups: all women, all men, women with kid 6-12, men with kid 6-12
gen byte gp1 = (female == 1)
gen byte gp2 = (female == 0)
gen byte gp3 = (female == 1 & kid6to12 == 1)
gen byte gp4 = (female == 0 & kid6to12 == 1)

* Create indicators related to labor market activity
gen atwork = (empstat == 10)
gen unemp_layoff = (unemp == 1 & whyunemp == 1)
gen unemp_other = (unemp == 1 & whyunemp != 1)
gen nlf1994 = nlf if year >= 1994
gen nlf1994_house = (nlf == 1 & whynilf == 4) if year >= 1994
gen nlf1994_other = (nlf == 1 & whynilf != 4) if year >= 1994

* Express all shares as percentages
foreach v of varlist spouse_present haskid kidunder6 kid6to12 kid13to17 emp atwork absent unemp unemp_layoff unemp_other nlf nlf1994 nlf1994_house nlf1994_other {
	quietly replace `v' = 100 * `v'
}

* Create table-ready variable labels
local hspace "\hspace{1em}"
label variable age            "`hspace'Age"
label variable spouse_present "`hspace'Married, spouse present"
label variable haskid         "`hspace'Own child \$<18\$ in household"
label variable kidunder6      "`hspace'`hspace'Youngest \$<6\$ years old"
label variable kid6to12       "`hspace'`hspace'Youngest 6--12 years old"
label variable kid13to17      "`hspace'`hspace'Youngest 13--17 years old"
label variable emp            "`hspace'Employed"
label variable atwork         "`hspace'`hspace'At work during reference week"
label variable absent         "`hspace'`hspace'Absent during reference week"
label variable unemp          "`hspace'Unemployed"
label variable unemp_layoff   "`hspace'`hspace'Temporary layoff"
label variable unemp_other    "`hspace'`hspace'Other reason unemployed"
label variable nlf            "`hspace'Not in labor force"
label variable nlf1994        "`hspace'Not in labor force (1994 or later)"
label variable nlf1994_house  "`hspace'`hspace'Taking care of house or family"
label variable nlf1994_other  "`hspace'`hspace'Other major activity"
label variable hours          "`hspace'Hours worked in reference week"


* Create a table of summary statistics
*-------------------------------------------------------------------------------

* Write table header
capture file close cps
file open cps using "$basepath/output/sumstats.tex", write replace
write_header, handle("cps") labelwidth("2.5in")

* Loop over variables
foreach v of varlist age spouse_present haskid kidunder6 kid6to12 kid13to17 emp atwork absent unemp unemp_layoff unemp_other nlf nlf1994 nlf1994_house nlf1994_other hours {
	* Write headers before specific variables
	if "`v'" == "age" {
		file write cps "\textbf{Demographics} & & & & \\[1em]" _n
	}
	else if "`v'" == "emp" {
		file write cps "\textbf{Labor market activity} & & & & \\[1em]" _n
	}

	* Extract variable label
	local vlbl : variable label `v'

	* Compute statistics for each of the four groups
	forvalues g = 1/4 {
		quietly sum `v' if gp`g' == 1 [aw = wtfinl]
		local mean`g' = string(r(mean), "%9.1f")
		local sd`g' = string(r(sd), "%9.1f")
	}

	* Write variable means into the .tex file
	file write cps "`vlbl' & `mean1' & `mean2' & `mean3' & `mean4' \\"

	* In the paper version, report standard deviations for select variables
	if inlist("`v'", "age", "hours") {
		file write cps _n "& (`sd1') & (`sd2') & (`sd3') & (`sd4') \\"
	}

	* Add blank lines after specific variables
	if inlist("`v'", "age", "spouse_present", "kid13to17", "absent", "unemp_other", "nlf", "nlf1994_other", "hours") {
		file write cps "[1em]"
	}

	* Move to the next line
	file write cps _n
}

* Record the sample size
forvalues g = 1/4 {
	quietly count if gp`g' == 1
	local n`g' = r(N)
}

file write cps "\textbf{Observations} & `n1' & `n2' & `n3' & `n4' \\" _n

* Close the table
write_footer, handle("cps")

* Close the log file
unlaunch
